QC and filter seurat object¶

In [1]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Harmony 
        library(harmony), 
        
        # Data 
        library(tidyverse), 
        
        # Biomart 
        library(biomaRt)
        
    )
)
In [2]:
random_seed <- 42
set.seed(random_seed)
In [3]:
options(warn=-1)
In [4]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")
In [5]:
# Source files
source("plotting_global.R")
source("bin/SeuratQC.R")

# SingleRPlot
source("bin/SingleRQC.R")

Parameter settings¶

In [6]:
# Files 
so_file <- "data/object/qc.rds"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

Import Seurat object¶

In [7]:
so <- readRDS(so_file)

Dimensional reduction and clustering of scaled data¶

In [8]:
so <- FindVariableFeatures(so, assay="RNA", nfeatures=2000)
so <- ScaleData(so, assay="RNA")
so <- RunPCA(so, npcs=50, assay="RNA", verbose=FALSE)
so <- FindNeighbors(so, dims=1:30, k.param=30, verbose=FALSE)
so <- FindClusters(so, soverbose=FALSE)
so <- RunUMAP(so, dims=1:50, n.neighbors=30, verbose=FALSE)
Centering and scaling data matrix

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 22246
Number of edges: 752436

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8917
Number of communities: 22
Elapsed time: 2 seconds
In [9]:
options(repr.plot.width=22.5, repr.plot.height=15)
dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters")
dplot_2 <- dplot(so, reduction="umap", group_by="tissue") + scale_color_manual(values=color$tissue)
dplot_3 <- dplot(so, reduction="umap", group_by="treatment") + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so, reduction="umap", group_by="main_labels") + scale_color_manual(values=color$main_labels_haemosphere)
dplot_5 <- dplot(so, reduction="umap", group_by="fine_labels") + scale_color_manual(values=color$fine_labels_haemosphere)
dplot_6 <- dplot(so, reduction="umap", group_by="cc_phase_class") + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so, reduction="umap", features="pMt_RNA")
fplot_2 <- fplot(so, reduction="umap", features="pHb_RNA")
fplot_3 <- fplot(so, reduction="umap", features="pRb_RNA")
dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + dplot_6 + fplot_1 + fplot_2 + fplot_3

Harmony integration¶

In [10]:
so <- RunHarmony(so, group.by.vars="treatment", max.iter.harmony=100, plot_convergence=FALSE)
Harmony 1/100

Harmony 2/100

Harmony 3/100

Harmony 4/100

Harmony 5/100

Harmony 6/100

Harmony 7/100

Harmony 8/100

Harmony 9/100

Harmony 10/100

Harmony 11/100

Harmony 12/100

Harmony 13/100

Harmony 14/100

Harmony 15/100

Harmony converged after 15 iterations

In [11]:
so <- FindNeighbors(so, dims=1:30, k.param=30, reduction="harmony", verbose=FALSE)
so <- FindClusters(so, soverbose=FALSE)
so <- RunUMAP(so, dims=1:30, n.neighbors=30, reduction="harmony", verbose=FALSE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 22246
Number of edges: 1376301

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8877
Number of communities: 20
Elapsed time: 4 seconds
In [12]:
options(repr.plot.width=22.5, repr.plot.height=15)
dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters")
dplot_2 <- dplot(so, reduction="umap", group_by="tissue") + scale_color_manual(values=color$tissue)
dplot_3 <- dplot(so, reduction="umap", group_by="treatment") + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so, reduction="umap", group_by="main_labels") + scale_color_manual(values=color$main_labels_haemosphere)
dplot_5 <- dplot(so, reduction="umap", group_by="fine_labels") + scale_color_manual(values=color$fine_labels_haemosphere)
dplot_6 <- dplot(so, reduction="umap", group_by="cc_phase_class") + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so, reduction="umap", features="pMt_RNA")
fplot_2 <- fplot(so, reduction="umap", features="pHb_RNA")
fplot_3 <- fplot(so, reduction="umap", features="pRb_RNA")
dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + dplot_6 + fplot_1 + fplot_2 + fplot_3

Seurat SCTransform integration¶

In [13]:
so <- SplitObject(so, split.by="treatment")
In [14]:
so <- lapply(so, function(so) {
    
    so <- SCTransform(so, assay="RNA", variable.features.n=3000, verbose=FALSE)
    so <- RunPCA(so, npcs=100, assay="SCT", verbose=FALSE)
    so <- FindNeighbors(so, dims=1:30, k.param=30, verbose=FALSE)
    so <- FindClusters(so, soverbose=FALSE)
    so <- RunUMAP(so, dims=1:30, n.neighbors=30, verbose=FALSE)
    
}
               )
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8704
Number of edges: 475321

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8793
Number of communities: 18
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 13542
Number of edges: 742098

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8775
Number of communities: 17
Elapsed time: 1 seconds
In [15]:
features <- SelectIntegrationFeatures(object.list=so, nfeatures=3000)
so <- PrepSCTIntegration(object.list=so, anchor.features=features)
anchors <- FindIntegrationAnchors(object.list=so, anchor.features=features, normalization.method="SCT")
so <- IntegrateData(anchorset=anchors, normalization.method="SCT")
Finding all pairwise anchors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 21554 anchors

Filtering anchors

	Retained 12838 anchors

Merging dataset 1 into 2

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

In [16]:
so <- RunPCA(so, npcs=30, assay="integrated", verbose=FALSE)
so <- FindNeighbors(so, dims=1:30, k.param=30, verbose=FALSE)
so <- FindClusters(so, soverbose=FALSE)
so <- RunUMAP(so, dims=1:30, n.neighbors=30, verbose=FALSE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 22246
Number of edges: 1296603

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8894
Number of communities: 19
Elapsed time: 4 seconds
In [17]:
options(repr.plot.width=22.5, repr.plot.height=15)
dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters")
dplot_2 <- dplot(so, reduction="umap", group_by="tissue") + scale_color_manual(values=color$tissue)
dplot_3 <- dplot(so, reduction="umap", group_by="treatment") + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so, reduction="umap", group_by="main_labels") + scale_color_manual(values=color$main_labels_haemosphere)
dplot_5 <- dplot(so, reduction="umap", group_by="fine_labels") + scale_color_manual(values=color$fine_labels_haemosphere)
dplot_6 <- dplot(so, reduction="umap", group_by="cc_phase_class") + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so, reduction="umap", features="pMt_RNA")
fplot_2 <- fplot(so, reduction="umap", features="pHb_RNA")
fplot_3 <- fplot(so, reduction="umap", features="pRb_RNA")
dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + dplot_6 + fplot_1 + fplot_2 + fplot_3

Session info¶

In [18]:
sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux 8.2 (Ootpa)

Matrix products: default
BLAS/LAPACK: /home/fdeckert/bin/miniconda3/envs/r.4.1.0-FD20200109SPLENO/lib/libopenblasp-r0.3.15.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
 [1] parallel  stats4    grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] viridis_0.6.2               viridisLite_0.4.0          
 [3] SingleR_1.6.1               SingleCellExperiment_1.14.1
 [5] SummarizedExperiment_1.22.0 Biobase_2.52.0             
 [7] GenomicRanges_1.44.0        GenomeInfoDb_1.28.0        
 [9] IRanges_2.26.0              S4Vectors_0.30.0           
[11] BiocGenerics_0.38.0         MatrixGenerics_1.4.0       
[13] matrixStats_0.61.0          ComplexHeatmap_2.8.0       
[15] patchwork_1.1.1             RColorBrewer_1.1-2         
[17] biomaRt_2.48.2              forcats_0.5.1              
[19] stringr_1.4.0               dplyr_1.0.7                
[21] purrr_0.3.4                 readr_2.0.2                
[23] tidyr_1.1.4                 tibble_3.1.6               
[25] ggplot2_3.3.5               tidyverse_1.3.1            
[27] harmony_0.1.0               Rcpp_1.0.7                 
[29] SeuratObject_4.0.2          Seurat_4.0.5               

loaded via a namespace (and not attached):
  [1] utf8_1.2.2                reticulate_1.22          
  [3] tidyselect_1.1.1          RSQLite_2.2.5            
  [5] AnnotationDbi_1.54.0      htmlwidgets_1.5.4        
  [7] BiocParallel_1.26.0       Rtsne_0.15               
  [9] ScaledMatrix_1.0.0        munsell_0.5.0            
 [11] codetools_0.2-18          ica_1.0-2                
 [13] pbdZMQ_0.3-5              future_1.23.0            
 [15] miniUI_0.1.1.1            withr_2.4.2              
 [17] colorspace_2.0-2          filelock_1.0.2           
 [19] uuid_0.1-4                rstudioapi_0.13          
 [21] ROCR_1.0-11               tensor_1.5               
 [23] listenv_0.8.0             labeling_0.4.2           
 [25] repr_1.1.3                GenomeInfoDbData_1.2.6   
 [27] polyclip_1.10-0           farver_2.1.0             
 [29] bit64_4.0.5               parallelly_1.28.1        
 [31] vctrs_0.3.8               generics_0.1.1           
 [33] BiocFileCache_2.0.0       R6_2.5.1                 
 [35] doParallel_1.0.16         clue_0.3-59              
 [37] rsvd_1.0.5                DelayedArray_0.18.0      
 [39] bitops_1.0-7              spatstat.utils_2.2-0     
 [41] cachem_1.0.6              assertthat_0.2.1         
 [43] promises_1.2.0.1          scales_1.1.1             
 [45] gtable_0.3.0              beachmat_2.8.0           
 [47] Cairo_1.5-12.2            globals_0.14.0           
 [49] goftest_1.2-3             rlang_0.4.12             
 [51] GlobalOptions_0.1.2       splines_4.1.0            
 [53] lazyeval_0.2.2            spatstat.geom_2.4-0      
 [55] broom_0.7.10              reshape2_1.4.4           
 [57] abind_1.4-5               modelr_0.1.8             
 [59] backports_1.3.0           httpuv_1.6.3             
 [61] tools_4.1.0               ellipsis_0.3.2           
 [63] spatstat.core_2.3-1       ggridges_0.5.3           
 [65] plyr_1.8.6                sparseMatrixStats_1.4.0  
 [67] base64enc_0.1-3           progress_1.2.2           
 [69] zlibbioc_1.38.0           RCurl_1.98-1.3           
 [71] prettyunits_1.1.1         rpart_4.1-15             
 [73] deldir_1.0-6              pbapply_1.5-0            
 [75] GetoptLong_1.0.5          cowplot_1.1.1            
 [77] zoo_1.8-9                 haven_2.4.3              
 [79] ggrepel_0.9.1             cluster_2.1.2            
 [81] fs_1.5.0                  magrittr_2.0.1           
 [83] RSpectra_0.16-0           data.table_1.14.2        
 [85] scattermore_0.7           circlize_0.4.13          
 [87] lmtest_0.9-39             reprex_2.0.1             
 [89] RANN_2.6.1                fitdistrplus_1.1-6       
 [91] hms_1.1.1                 mime_0.12                
 [93] evaluate_0.14             xtable_1.8-4             
 [95] XML_3.99-0.8              readxl_1.3.1             
 [97] gridExtra_2.3             shape_1.4.6              
 [99] compiler_4.1.0            KernSmooth_2.23-20       
[101] crayon_1.4.2              htmltools_0.5.2          
[103] mgcv_1.8-36               later_1.3.0              
[105] tzdb_0.2.0                lubridate_1.8.0          
[107] DBI_1.1.1                 dbplyr_2.1.1             
[109] MASS_7.3-54               rappdirs_0.3.3           
[111] Matrix_1.3-4              cli_3.1.0                
[113] igraph_1.2.9              pkgconfig_2.0.3          
[115] IRdisplay_1.0             plotly_4.10.0            
[117] spatstat.sparse_2.0-0     xml2_1.3.2               
[119] foreach_1.5.1             XVector_0.32.0           
[121] rvest_1.0.2               digest_0.6.28            
[123] sctransform_0.3.2         RcppAnnoy_0.0.19         
[125] spatstat.data_2.1-0       Biostrings_2.60.0        
[127] cellranger_1.1.0          leiden_0.3.9             
[129] uwot_0.1.10               DelayedMatrixStats_1.14.0
[131] curl_4.3.2                shiny_1.7.1              
[133] rjson_0.2.20              lifecycle_1.0.1          
[135] nlme_3.1-152              jsonlite_1.7.2           
[137] BiocNeighbors_1.10.0      fansi_0.5.0              
[139] pillar_1.6.4              lattice_0.20-44          
[141] KEGGREST_1.32.0           fastmap_1.1.0            
[143] httr_1.4.2                survival_3.2-11          
[145] glue_1.5.0                png_0.1-7                
[147] iterators_1.0.13          bit_4.0.4                
[149] stringi_1.7.5             blob_1.2.1               
[151] BiocSingular_1.8.0        memoise_2.0.0            
[153] IRkernel_1.2              irlba_2.3.3              
[155] future.apply_1.8.1